#--
# This file is part of uhferret.
#
# Author::    Peter Lane
# Copyright:: Copyright 2012-20, Peter Lane.
# License::   GPLv3
#
# uhferret is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# uhferret is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with uhferret.  If not, see <http://www.gnu.org/licenses/>.

# TODO: Make the conversions etc work on Windows as well as Linux.

#
# A collection of methods to support checking and converting different 
# document file types.
#
module Utils
  
  # Check if given command is present on the system
  def Utils.command_present? command
    `which #{command}` != ""
  end

  # Create a list of permitted compressed file extensions
  # depending on the available commands
  CompressedFileExtensions = []
  [["unrar", ["rar"]],
    ["tar",  ["tar.bz2", "tar.gz", "tbz2", "tgz"]],
    ["unzip", ["zip"]]].each do |defn|
    if Utils.command_present? defn[0]
      CompressedFileExtensions.concat defn[1]
    end
  end

  # Return true if the filename has a file ending for code
  def Utils.is_code? filename
    [".c", ".h", ".cpp", ".java"].include? File.extname(filename)
  end

  # Return true if the filename has a valid extension
  def Utils.valid_document? filename
    Utils.is_code? filename or 
    (".txt" == File.extname(filename)) or
    Utils.is_pdf_document? filename or 
    Utils.is_wp_document? filename
  end

  # Return true if the filename ends with .pdf and so is a pdf document.
  def Utils.is_pdf_document? filename
    ".pdf" == File.extname(filename)
  end

  # Return true if the filename ends with a known word processor extension.
  def Utils.is_wp_document? filename
    [".doc", ".rtf", ".docx", ".abw"].include? File.extname(filename)
  end

  # Use pdf2txt to convert the pdf file to text
  # The output is the converted filename, obtained by adding .txt to 
  # the given filename
  def Utils.convert_pdf_document filename
    if Utils.command_present?("pdftotext")
      output_filename = "#{filename}.txt"
      `pdftotext -layout -enc Latin1 -nopgbrk #{filename} #{output_filename}` 
      return output_filename
    else
      return filename
    end
  end

  # Use abiword to convert the word-processed file to text
  # The output is the converted filename, obtained by adding .txt to 
  # the given filename
  def Utils.convert_wp_document filename
    if Utils.command_present?("abiword")
      output_filename = "#{filename}.txt"
      `abiword --to=txt #{filename} -o #{output_filename}` 
      return output_filename
    else
      return filename 
    end
  end
end
